Case Study: Time Series Analysis on Covid-19 2020 Dataset¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [2]:
ca_df = pd.read_csv('covid-19/countries-aggregated.csv')

ww_df = pd.read_csv('covid-19/worldwide-aggregated.csv')
In [3]:
print(ca_df.shape)

ca_df.head()
(24816, 5)
Out[3]:
Date Country Confirmed Recovered Deaths
0 2020-01-22 Afghanistan 0 0 0
1 2020-01-22 Albania 0 0 0
2 2020-01-22 Algeria 0 0 0
3 2020-01-22 Andorra 0 0 0
4 2020-01-22 Angola 0 0 0
In [4]:
print(ww_df.shape)

ww_df.head()
(132, 5)
Out[4]:
Date Confirmed Recovered Deaths Increase rate
0 2020-01-22 555 28 17 NaN
1 2020-01-23 654 30 18 17.837838
2 2020-01-24 941 36 26 43.883792
3 2020-01-25 1434 39 42 52.391073
4 2020-01-26 2118 52 56 47.698745
In [5]:
ca_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24816 entries, 0 to 24815
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Date       24816 non-null  object
 1   Country    24816 non-null  object
 2   Confirmed  24816 non-null  int64 
 3   Recovered  24816 non-null  int64 
 4   Deaths     24816 non-null  int64 
dtypes: int64(3), object(2)
memory usage: 969.5+ KB
In [6]:
ww_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Date           132 non-null    object 
 1   Confirmed      132 non-null    int64  
 2   Recovered      132 non-null    int64  
 3   Deaths         132 non-null    int64  
 4   Increase rate  131 non-null    float64
dtypes: float64(1), int64(3), object(1)
memory usage: 5.3+ KB
In [7]:
ww_df['Date'] = pd.to_datetime(ww_df['Date'])

ww_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           132 non-null    datetime64[ns]
 1   Confirmed      132 non-null    int64         
 2   Recovered      132 non-null    int64         
 3   Deaths         132 non-null    int64         
 4   Increase rate  131 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(3)
memory usage: 5.3 KB
In [8]:
plt.figure(figsize=(16, 8))

sns.lineplot(x = 'Date', y = 'Confirmed', data = ww_df)

plt.xticks(rotation = 45)

plt.show()
In [9]:
ca_df['Date'] = pd.to_datetime(ca_df['Date'])

ca_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24816 entries, 0 to 24815
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       24816 non-null  datetime64[ns]
 1   Country    24816 non-null  object        
 2   Confirmed  24816 non-null  int64         
 3   Recovered  24816 non-null  int64         
 4   Deaths     24816 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 969.5+ KB
In [10]:
group_ca_df = ca_df.groupby('Country')

group_ca_df.first()
Out[10]:
Date Confirmed Recovered Deaths
Country
Afghanistan 2020-01-22 0 0 0
Albania 2020-01-22 0 0 0
Algeria 2020-01-22 0 0 0
Andorra 2020-01-22 0 0 0
Angola 2020-01-22 0 0 0
... ... ... ... ...
West Bank and Gaza 2020-01-22 0 0 0
Western Sahara 2020-01-22 0 0 0
Yemen 2020-01-22 0 0 0
Zambia 2020-01-22 0 0 0
Zimbabwe 2020-01-22 0 0 0

188 rows × 4 columns

In [11]:
group_ca_df.groups.keys()
Out[11]:
dict_keys(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark', 'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Holy See', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Korea, South', 'Kosovo', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'MS Zaandam', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius', 'Mexico', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Namibia', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Macedonia', 'Norway', 'Oman', 'Pakistan', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Somalia', 'South Africa', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'Taiwan*', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'US', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'Uruguay', 'Uzbekistan', 'Venezuela', 'Vietnam', 'West Bank and Gaza', 'Western Sahara', 'Yemen', 'Zambia', 'Zimbabwe'])
In [12]:
plt.figure(figsize=(18, 14))

plt.subplots_adjust(hspace=0.5)

plt.subplot(311)
sns.lineplot(x = 'Date', y = 'Confirmed', data = group_ca_df.get_group('India'))
plt.title('India')

plt.subplot(312)
sns.lineplot(x = 'Date', y = 'Confirmed', data = group_ca_df.get_group('China'))
plt.title('China')

plt.subplot(313)
sns.lineplot(x = 'Date', y = 'Confirmed', data = group_ca_df.get_group('US'))
plt.title('US')

plt.show()
In [13]:
temp_df = ca_df.loc[(ca_df.Country=='China') | (ca_df.Country=='India') | (ca_df.Country=='US'), : ]

temp_df.head()
Out[13]:
Date Country Confirmed Recovered Deaths
36 2020-01-22 China 548 28 17
79 2020-01-22 India 0 0 0
174 2020-01-22 US 1 0 0
224 2020-01-23 China 643 30 18
267 2020-01-23 India 0 0 0
In [14]:
temp_df.info()
<class 'pandas.core.frame.DataFrame'>
Index: 396 entries, 36 to 24802
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       396 non-null    datetime64[ns]
 1   Country    396 non-null    object        
 2   Confirmed  396 non-null    int64         
 3   Recovered  396 non-null    int64         
 4   Deaths     396 non-null    int64         
dtypes: datetime64[ns](1), int64(3), object(1)
memory usage: 18.6+ KB
In [15]:
plt.figure(figsize=(16, 8))

sns.lineplot(x = 'Date', y = 'Confirmed', hue='Country', data = temp_df)

plt.show()

Plotly Express - Choropleth (Maps)¶

In [16]:
# Importing required Library

import plotly.express as px
In [17]:
group_df = ca_df.groupby('Date')

group_df.first()
Out[17]:
Country Confirmed Recovered Deaths
Date
2020-01-22 Afghanistan 0 0 0
2020-01-23 Afghanistan 0 0 0
2020-01-24 Afghanistan 0 0 0
2020-01-25 Afghanistan 0 0 0
2020-01-26 Afghanistan 0 0 0
... ... ... ... ...
2020-05-28 Afghanistan 13036 1209 235
2020-05-29 Afghanistan 13659 1259 246
2020-05-30 Afghanistan 14525 1303 249
2020-05-31 Afghanistan 15205 1328 257
2020-06-01 Afghanistan 15750 1428 265

132 rows × 4 columns

In [18]:
temp_df = group_df.get_group("2020-03-15")

temp_df.head()
Out[18]:
Date Country Confirmed Recovered Deaths
9964 2020-03-15 Afghanistan 16 0 0
9965 2020-03-15 Albania 42 0 1
9966 2020-03-15 Algeria 48 12 4
9967 2020-03-15 Andorra 1 1 0
9968 2020-03-15 Angola 0 0 0
In [19]:
fig = px.choropleth(temp_df,
                   locations='Country',
                   color = 'Confirmed',
                   hover_name='Confirmed',
                   locationmode='country names')
fig.update_layout(
    width=1000,
    height=500
)

fig.show()
In [20]:
fig = px.choropleth(ca_df,
                   locations='Country',
                   color = 'Confirmed',
                   hover_name='Confirmed',
                   locationmode='country names',
                   animation_frame='Date')
fig.update_layout(
    width=1000,
    height=500
)
fig.show()
In [21]:
fig = px.choropleth(ca_df,
                   locations = 'Country',
                   color = 'Confirmed',
                   hover_name = 'Confirmed',
                   locationmode = 'country names',
                   animation_frame = 'Date',
                   color_continuous_scale = px.colors.sequential.OrRd)
fig.update_layout(
    width=1000,
    height=500
)
fig.show()
In [22]:
fig = px.choropleth(ca_df,
                   locations = 'Country',
                   color = 'Confirmed',
                   hover_name = 'Confirmed',
                   locationmode = 'country names',
                   animation_frame = 'Date',
                   color_continuous_scale = px.colors.sequential.OrRd,
                   scope = 'asia')
fig.update_layout(
    width=1000,
    height=500
)
fig.show()
In [ ]: